home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Plus Leser 15
/
Amiga Plus Leser CD 15.iso
/
Tools
/
Development
/
PluginSRC_EvenMore
/
converthtml.e
< prev
next >
Wrap
Text File
|
2002-03-12
|
18KB
|
670 lines
/*
HTML to text converter
EvenMore FileIO plugin
Author: Chris Perver
Copyright (c) 2001
*/
OPT MODULE, REG=5
MODULE 'tools/ctype'
EXPORT PROC convhtml(memadr:PTR TO CHAR, begin, lenadr, mem2 = NIL:PTR TO CHAR, lenadr2 = NIL)
DEF count = 0, count2 = 0
DEF notdone = TRUE
-> Allocate memory for file
-> IF (mem2 := New(lenadr1)) = NIL THEN RETURN -1
-> mem2[lenadr + 1] := "\n" -> Put safety LF at the end of mem
DEF html[256]:STRING, wordwrap = 80, cchar = 0, ww = TRUE, oldpos = 0, oldcc = 0, i, j
DEF pre = FALSE, iscomm = FALSE
DEF isnotend = TRUE
IF mem2 = NIL
mem2 := memadr
lenadr2 := lenadr
ENDIF
count := begin
count2 := begin
WHILE notdone AND (count < lenadr) AND (count2 < lenadr2)
SELECT 256 OF memadr[count]
-> SPECIAL
CASE "&"
UpperStr(StrCopy(html, memadr + count + 1,10))
IF InStr(html, 'NBSP') = 0
mem2[count2++] := " "
INC cchar
ELSEIF InStr(html, 'AMP') = 0
mem2[count2++] := "&"
INC cchar
ELSEIF InStr(html, 'QUOT') = 0
mem2[count2++] := $22
INC cchar
ELSEIF InStr(html, 'LT') = 0
mem2[count2++] := "<"
INC cchar
ELSEIF InStr(html, 'GT') = 0
mem2[count2++] := ">"
INC cchar
ENDIF
WHILE memadr[count] <> ";" DO INC count
INC count
-> Ignore LF, CR
CASE "\n", "\b"
IF pre = FALSE
IF mem2[count2 - 1] <> " "
IF iscomm = FALSE
mem2[count2++] := " "
ENDIF
ENDIF
ELSE
mem2[count2++] := memadr[count]
cchar := 0
ENDIF
INC count
-> Skip prespaces
WHILE memadr[count] = " " DO INC count
-> Cut out extra spaces
CASE " "
IF pre = FALSE
IF mem2[count2-1] <> " "
mem2[count2++] := memadr[count++]
INC cchar
ENDIF
WHILE memadr[count] = " " DO INC count
ELSE
mem2[count2++] := memadr[count++]
INC cchar
ENDIF
-> Commands
CASE "<"
iscomm := TRUE
INC count
-> Get len
i := count
WHILE memadr[i] <> ">" DO INC i
INC i
UpperStr(StrCopy(html, memadr + count, i - count))
SELECT 256 OF memadr[count]
-> END OF COMMAND
CASE "/"
iscomm := FALSE
-> UNDO BOLD
IF memadr[count + 2] = ">"
SELECT 256 OF memadr[count + 1]
CASE "a", "A"
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "2"
mem2[count2++] := "4"
mem2[count2++] := ";"
mem2[count2++] := "3"
mem2[count2++] := "1"
mem2[count2++] := "m"
CASE "b", "B"
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "2"
mem2[count2++] := "1"
mem2[count2++] := "m"
CASE "i", "I"
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "2"
mem2[count2++] := "3"
mem2[count2++] := "m"
CASE "u", "U"
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "2"
mem2[count2++] := "4"
mem2[count2++] := "m"
CASE "p", "P"
mem2[count2++] := "\n"
cchar := 0
ENDSELECT
count := i
-> IGNORE
ELSE
SELECT 256 OF memadr[count + 1]
-> ADDRESS
CASE "a", "A"
IF InStr(html, '/ADDRESS') = 0
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "2"
mem2[count2++] := "1"
mem2[count2++] := ";"
mem2[count2++] := "2"
mem2[count2++] := "3"
mem2[count2++] := "m"
ENDIF
-> CENTER, CITE
CASE "c", "C"
IF InStr(html, '/CITE') = 0
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "2"
mem2[count2++] := "3"
mem2[count2++] := "m"
ELSEIF InStr(html, '/CENTER') = 0
mem2[count2++] := "\n"
cchar := 0
ENDIF
-> END DD
CASE "d", "D"
IF InStr(html, '/DD') = 0
cchar := 0
ENDIF
-> END OF HEADER
CASE "h", "H"
IF InStr(html, '/H') = 0
IF (memadr[count+2] >= "1" AND memadr[count+2] <= "6")
mem2[count2++] := "\n"
mem2[count2++] := "\n"
cchar := 0
ENDIF
ENDIF
-> END OF PRE
CASE "p", "P"
IF InStr(html, '/PRE') = 0
mem2[count2++] := "\n"
cchar := 0
pre := FALSE
-> END OF PARA
ELSEIF InStr(html, '/P') = 0 ->memadr[count+2] = ">"
mem2[count2++] := "\n"
cchar := 0
ENDIF
-> END OF BULLET
CASE "t", "T"
IF InStr(html, '/TD') = 0
mem2[count2++] := "\n"
cchar := 0
ENDIF
-> /STRONG
CASE "s", "S"
IF InStr(html, '/STRONG') = 0
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "2"
mem2[count2++] := "1"
mem2[count2++] := "m"
ENDIF
-> /VAR
CASE "v", "V"
IF InStr(html, '/VAR') = 0
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "2"
mem2[count2++] := "3"
mem2[count2++] := "m"
ENDIF
ENDSELECT
count := i
ENDIF
-> COMMENT
CASE "!"
count := i
-> URL
CASE "a", "A"
IF InStr(html, 'HREF') > 0
count := i
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "4"
mem2[count2++] := ";"
mem2[count2++] := "3"
mem2[count2++] := "3"
mem2[count2++] := "m"
ELSEIF InStr(html, 'NAME') > 0
count := i
ELSEIF InStr(html, 'AREA') = 0
count := i
ELSEIF InStr(html, 'ADDRESS') = 0
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "1"
mem2[count2++] := ";"
mem2[count2++] := "3"
mem2[count2++] := "m"
count := i
ENDIF
-> BODY, BASE, BREAK
CASE "b", "B"
-> BOLD
IF InStr(html, 'BODY') = 0
count := i
ELSEIF InStr(html, 'BASE') = 0
count := i
ELSEIF InStr(html, 'BR') = 0
mem2[count2++] := "\n"
cchar := 0
count := i
-> Take it as BOLD
ELSE
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "1"
mem2[count2++] := "m"
count := i
ENDIF
-> CENTER, CITE, CODE
CASE "c", "C"
IF InStr(html, 'CENTER') = 0
count := i
ELSEIF InStr(html, 'CODE') = 0
count := i
ELSEIF InStr(html, 'CITE') = 0
count := i
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "3"
mem2[count2++] := "m"
ENDIF
-> LISTS
CASE "d", "D"
IF InStr(html, 'DD') = 0
mem2[count2++] := "\n"
cchar := 0
count := i
ELSEIF InStr(html, 'DL') = 0
mem2[count2++] := "\n"
cchar := 0
count := i
ELSEIF InStr(html, 'DT') = 0
count := i
ELSEIF InStr(html, 'DIV') = 0
mem2[count2++] := "\n"
count := i
ENDIF
-> EMPHASIZE
CASE "e", "E"
IF InStr(html, 'EM') = 0
count := i
ENDIF
-> FONT
CASE "f", "F"
IF InStr(html, 'FONT') = 0
count := i
ELSEIF InStr(html, 'FRAME') = 0
count := i
ELSEIF InStr(html, 'FORM') = 0
count := i
ENDIF
-> HTML, HEAD, HEADINGS, HORIZ RULE
CASE "h", "H"
IF (memadr[count+1] >= "1" AND memadr[count+1] <= "6")
mem2[count2++] := "\n"
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "1"
mem2[count2++] := "m"
cchar := 0
count := i
ELSEIF InStr(html, 'HTML') = 0
count := i
ELSEIF InStr(html, 'HEAD') = 0
count := i
ELSEIF InStr(html, 'HR') = 0
mem2[count2++] := "\n"
mem2[count2++] := "\n"
FOR j := 0 TO 79
mem2[count2++] := "-"
ENDFOR
mem2[count2++] := "\n"
mem2[count2++] := "\n"
cchar := 0
count := i
ENDIF
-> STYLES
CASE "i", "I"
-> ITALIC
IF memadr[count+1] = ">"
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "3"
mem2[count2++] := "m"
count := count + 2
ELSE
IF InStr(html, 'IMG') = 0
mem2[count2++] := "["
INC cchar
j := InStr(html, 'ALT')
IF j >= 0
count := count + j + 1
WHILE memadr[count] <> $22 DO INC count
INC count
WHILE memadr[count] <> $22
mem2[count2++] := memadr[count++]
INC cchar
ENDWHILE
ENDIF
count := i
mem2[count2++] := "]"
INC cchar
ELSEIF InStr(html, 'INPUT') = 0
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "4"
mem2[count2++] := ";"
mem2[count2++] := "3"
mem2[count2++] := "3"
mem2[count2++] := "m"
IF (InStr(html, 'HIDDEN')) = FALSE
j := InStr(html, 'VALUE')
IF j >= 0
count := count + j + 1
WHILE memadr[count] <> $22 DO INC count
INC count
WHILE memadr[count] <> $22
mem2[count2++] := memadr[count++]
INC cchar
ENDWHILE
ENDIF
ENDIF
count := i
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "2"
mem2[count2++] := "4"
mem2[count2++] := ";"
mem2[count2++] := "3"
mem2[count2++] := "1"
mem2[count2++] := "m"
ELSEIF InStr(html, 'IFRAME') = 0
count := i
ELSEIF InStr(html, 'ISINDEX') = 0
count := i
ELSEIF InStr(html, 'ILAYER') = 0
count := i
ENDIF
ENDIF
-> LIST
CASE "l", "L"
IF InStr(html, 'LI') = 0
count := i
mem2[count2++] := "\n"
mem2[count2++] := "o"
mem2[count2++] := " "
cchar := 2
ENDIF
-> META TAG, MAP
CASE "m", "M"
IF InStr(html, 'META') = 0
count := i
ELSEIF InStr(html, 'MAP') = 0
count := i
ENDIF
-> NO FRAMES
CASE "n", "N"
IF InStr(html, 'NOFRAMES') = 0
count := i
ELSEIF InStr(html, 'NOBR') = 0
count := i
ELSEIF InStr(html, 'NOLAYER') = 0
count := i
ENDIF
-> ?
CASE "o", "O"
IF InStr(html, 'O:') = 0
count := i
ELSEIF InStr(html, 'OL') = 0
count := i
ELSEIF InStr(html, 'OPTION') = 0
mem2[count2++] := "\n"
mem2[count2++] := "o"
mem2[count2++] := " "
cchar := 2
count := i
ENDIF
-> PARAGRAPH, PRE!?
CASE "p", "P"
IF InStr(html, 'PRE') = 0
mem2[count2++] := "\n"
cchar := 0
pre := TRUE
count := i
-> PARA
ELSE
-> para := TRUE
mem2[count2++] := "\n"
mem2[count2++] := "\n"
cchar := 0
count := i
ENDIF
-> STRONG, SMALL, SAMP
CASE "s", "S"
IF InStr(html, 'STRONG') = 0
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "1"
mem2[count2++] := "m"
count := i
ELSEIF InStr(html, 'SMALL') = 0
count := i
ELSEIF InStr(html, 'SAMP') = 0
count := i
ELSEIF InStr(html, 'STYLE') = 0
count := i
isnotend := TRUE
WHILE isnotend
WHILE memadr[count] <> "<" DO INC count
UpperStr(StrCopy(html, memadr+count, 10))
IF InStr(html, '</STYLE>') = 0
count := count + 8
isnotend := FALSE
ELSE
INC count
ENDIF
ENDWHILE
ELSEIF InStr(html, 'SCRIPT') = 0
count := i
isnotend := TRUE
WHILE isnotend
WHILE memadr[count] <> "<" DO INC count
UpperStr(StrCopy(html, memadr+count, 10))
IF InStr(html, '</SCRIPT>') = 0
count := count + 9
isnotend := FALSE
ELSE
INC count
ENDIF
ENDWHILE
ELSEIF InStr(html, 'SPAN') = 0
count := i
ELSEIF InStr(html, 'SELECT') = 0
count := i
ENDIF
-> TITLE
CASE "t", "T"
IF InStr(html, 'TITLE') = 0
count := i
WHILE memadr[count] <> "<"
INC count
ENDWHILE
ELSEIF InStr(html, 'TEXTAREA') = 0
count := i
ELSEIF InStr(html, 'TT') = 0
count := i
ELSEIF InStr(html, 'TABLE') = 0
count := i
ELSEIF InStr(html, 'TR') = 0
mem2[count2++] := "\n"
cchar := 0
count := i
ELSEIF InStr(html, 'TD') = 0
count := i
INC cchar
mem2[count2++] := "\t"
ELSEIF InStr(html, 'TH') = 0
mem2[count2++] := "\t"
INC cchar
count := i
ELSEIF InStr(html, 'TBODY') = 0
count := i
ENDIF
-> UNDERSCORE, LIST
CASE "u", "U"
IF memadr[count+1] = ">"
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "4"
mem2[count2++] := "m"
count := count + 2
ELSEIF InStr(html, 'UL') = 0
count := i
ENDIF
-> VAR
CASE "v", "V"
IF InStr(html, 'VAR') = 0
mem2[count2++] := "\e"
mem2[count2++] := "["
mem2[count2++] := "3"
mem2[count2++] := "m"
count := i
ENDIF
ENDSELECT
-> Skip trailing spaces after <>
-> WHILE memadr[count] = " " DO INC count
DEFAULT
-> WE ARE ABOUT TO DISPLAY PRINTABLE TEXT
iscomm := FALSE
mem2[count2] := memadr[count++]
INC cchar
-> WORDWRAP IF NECESSARY
IF ww = TRUE
IF cchar >= wordwrap
-> Find last space
IF isspace(mem2[count2]) = TRUE
mem2[count2++] := "\n"
-> INC count2
cchar := 0
ELSE
oldpos := count2
oldcc := cchar
WHILE isspace(mem2[count2]) = FALSE
mem2[count2 + 1] := mem2[count2]
DEC count2
DEC cchar
ENDWHILE
INC count2
mem2[count2] := "\n"
cchar := oldcc - cchar
count2 := oldpos + 1
ENDIF
ENDIF
ENDIF
INC count2
ENDSELECT
ENDWHILE
mem2[count2] := "\n"
-> IF memadr
-> Dispose(memadr); memadr := NIL
-> ENDIF
ENDPROC mem2, count2
-><